In [2]:
import os
import time
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.markeredgewidth': 2})
In [3]:
from sklearn.cluster import SpectralClustering
In [4]:
import visual_behavior_glm.GLM_analysis_tools as gat
In [5]:
import visual_behavior.visualization.utils as utils
import visual_behavior.data_access.loading as loading
import visual_behavior.data_access.utilities as utilities
In [6]:
from visual_behavior.dimensionality_reduction.clustering import processing
from visual_behavior.dimensionality_reduction.clustering import plotting
In [7]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

get experiments and cells tables and limit to closest familiar and novel active

In [8]:
experiments_table = loading.get_platform_paper_experiment_table()
len(experiments_table)
Out[8]:
1249
In [9]:
# limit to closest familiar and novel active
experiments_table = utilities.limit_to_last_familiar_second_novel_active(experiments_table)
experiments_table = utilities.limit_to_containers_with_all_experience_levels(experiments_table)
len(experiments_table)
Out[9]:
402
In [10]:
matched_experiments = experiments_table.index.values
In [11]:
cells_table = loading.get_cell_table()
len(cells_table.cell_specimen_id.unique())
Out[11]:
28833
In [12]:
cells_table = loading.get_matched_cells_table(cells_table)
matched_cells = cells_table.cell_specimen_id.unique()
3921 cells in matched cells table
In [13]:
# get cre_lines and cell types for plot labels
cre_lines = np.sort(cells_table.cre_line.unique())
cell_types = utilities.get_cell_types_dict(cre_lines, experiments_table)

get GLM output, filter and reshape

In [14]:
glm_version = '24_events_all_L2_optimize_by_session'
model_output_type = 'adj_fraction_change_from_full'
In [ ]:
base_dir = r'\\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4'
base_dir = os.path.join(base_dir, glm_version)
if not os.path.exists(base_dir):
    os.mkdir(base_dir)

NOTE: create a new folder if you are testing out this notebook or else it will overwrite existing files & plots

In [15]:
# folder = '220310_across_session_norm'
save_dir = os.path.join(base_dir, folder)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
In [16]:
features = processing.get_features_for_clustering()

get across session normalized dropout scores

In [ ]:
# # get GLM results from saved file in save_dir or from mongo if file doesnt exist
# results_pivoted = processing.get_glm_results_pivoted_for_clustering(glm_version, model_output_type, save_dir)
# results_pivoted.head()
In [25]:
import visual_behavior_glm.GLM_across_session as gas

df = gas.load_cells(cells='all')
1086494804 could not be loaded
1086494677 could not be loaded
1086494537 could not be loaded
1086494288 could not be loaded
1086494248 could not be loaded
1086494176 could not be loaded
1086494076 could not be loaded
1086493993 could not be loaded
1086493543 could not be loaded
1086493441 could not be loaded
1086493384 could not be loaded
1086493213 could not be loaded
1086493082 could not be loaded
1086492984 could not be loaded
1086492915 could not be loaded
In [30]:
df = df.rename(columns={'ophys_experiment_id_x':'ophys_experiment_id', 'cell_specimen_id_x':'cell_specimen_id'})
df = df.set_index('cell_specimen_id')

within = df[[key for key in df.keys() if '_within' in key]+['ophys_experiment_id', 'experience_level']]
across = df[[key for key in df.keys() if '_across' in key]+['ophys_experiment_id', 'experience_level']]
In [82]:
results_pivoted = across.copy()
results_pivoted = results_pivoted.rename(columns={'omissions_across':'omissions', 'all-images_across':'all-images', 
                                         'behavioral_across':'behavioral', 'task_across':'task'})
print(len(results_pivoted))
11718
In [83]:
results_pivoted.head()
Out[83]:
omissions all-images behavioral task ophys_experiment_id experience_level
cell_specimen_id
1086551315 0.0 0.000000 0.0 0.000000 794381992 Familiar
1086551315 0.0 -0.295018 0.0 -0.094833 795076128 Novel 1
1086551315 0.0 -0.258126 0.0 -0.430496 796105304 Novel >1
1086550804 0.0 0.000000 0.0 0.000000 794381992 Familiar
1086550804 0.0 0.000000 0.0 0.000000 795076128 Novel 1

limit to strictly matched cells

In [84]:
# get rid of passive sessions

# get cells table and limit to cells matched in closest familiar and novel active
cells_table = loading.get_cell_table()
cells_table = utilities.limit_to_last_familiar_second_novel_active(cells_table)
cells_table = utilities.limit_to_containers_with_all_experience_levels(cells_table)
cells_table = utilities.limit_to_cell_specimen_ids_matched_in_all_experience_levels(cells_table)
print(len(cells_table.cell_specimen_id.unique()), 'cells in cells_table after limiting to strictly matched cells')

# get matched cells and experiments to limit to
matched_experiments = cells_table.ophys_experiment_id.unique()
matched_cells = cells_table.cell_specimen_id.unique()

# limit results pivoted to to last familiar and second novel
results_pivoted = results_pivoted[results_pivoted.ophys_experiment_id.isin(matched_experiments)]
results_pivoted = results_pivoted.loc[matched_cells]
print(len(results_pivoted.index.unique()), 'cells in results_pivoted after limiting to strictly matched cells')
3921 cells in cells_table after limiting to strictly matched cells
3921 cells in results_pivoted after limiting to strictly matched cells
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel_launcher.py:16: FutureWarning:


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike

drop duplicates

In [95]:
results_pivoted = results_pivoted.reset_index().drop_duplicates(subset=['cell_specimen_id', 'experience_level']).set_index('cell_specimen_id')
len(results_pivoted)
Out[95]:
11733
In [97]:
len(results_pivoted.index.unique())
Out[97]:
3921

save results_pivoted

In [211]:
results_pivoted.to_hdf(os.path.join(save_dir, glm_version + '_results_pivoted.h5'), key='df')
C:\Users\marinag\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py:2530: PerformanceWarning:


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->['experience_level']]


continue processing

In [116]:
dropouts = results_pivoted.copy()
In [118]:
dropouts = dropouts.drop(columns='ophys_experiment_id')

# sort by features
features = processing.get_features_for_clustering()
columns = [*features, 'experience_level']
dropouts = dropouts[columns]

# make everything positive
for feature in features:
    dropouts[feature] = np.abs(dropouts[feature])
In [120]:
# unstack dropout scores to get a vector of features x experience levels for each cell
feature_matrix = processing.get_feature_matrix_for_clustering(dropouts, glm_version, save_dir=save_dir)
feature_matrix.head()
found 7812 duplicated cells. But not removed. This needs to be fixed
3906
Out[120]:
all-images omissions behavioral task
experience_level Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1 Familiar Novel 1 Novel >1
cell_specimen_id
1086489847 0.000000 0.060457 0.017832 0.000000 0.000000 0.0 0.000000 0.084897 0.971563 0.000000 0.004045 0.009827
1086489860 0.000000 0.910349 0.000000 0.000000 0.109417 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1086489891 0.000000 0.279929 0.000000 0.524089 0.000000 0.0 0.168301 0.008724 1.000000 0.000000 0.025777 0.000000
1086489976 0.817370 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1086490002 0.852927 0.000000 0.355870 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.021871 0.000000 0.000000
In [121]:
# get cell metadata for all cells in feature_matrix
cell_metadata = processing.get_cell_metadata_for_feature_matrix(feature_matrix, cells_table)
cell_metadata.head()
3906 cells in cell_metadata for feature_matrix
Out[121]:
ophys_experiment_id equipment_name full_genotype mouse_id reporter_line driver_line sex age_in_days cre_line indicator ... area_binned_depth layer date first_novel n_relative_to_first_novel last_familiar last_familiar_active second_novel second_novel_active experience_exposure
cell_specimen_id
1086551315 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086550804 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086541251 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086540341 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3
1086539950 794381992 CAM2P.4 Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G... 412366 Ai93(TITL-GCaMP6f) [Slc17a7-IRES2-Cre, Camk2a-tTA] F 149.0 Slc17a7-IRES2-Cre GCaMP6f ... VISp_375 lower 20181212 False -1.0 True True False False Familiar 3

5 rows × 40 columns

plot feature matrix for each cre line

In [122]:
plotting.plot_feature_matrix_for_cre_lines(feature_matrix, cell_metadata, save_dir=base_dir, folder=folder)

get Silhouette scores

In [130]:
silhouette_scores = processing.load_silhouette_scores(glm_version, feature_matrix, cell_metadata, save_dir)
size of X = (400, 12)
NaNs in the array = 0
n 2 clusters mean score = 0.17658571012962376
n 3 clusters mean score = 0.20890017538481417
n 4 clusters mean score = 0.2074041559563443
n 5 clusters mean score = 0.22968052630647592
n 6 clusters mean score = 0.23857762434875568
n 7 clusters mean score = 0.23949704303340558
n 8 clusters mean score = 0.2585196565769154
n 9 clusters mean score = 0.2686733484678343
n 10 clusters mean score = 0.2575824677000889
n 11 clusters mean score = 0.25471528278374006
n 12 clusters mean score = 0.24582193043517803
n 13 clusters mean score = 0.23703900031676586
n 14 clusters mean score = 0.2504590236519718
n 15 clusters mean score = 0.2547866036307835
n 16 clusters mean score = 0.24915284119228304
n 17 clusters mean score = 0.24540307160898261
n 18 clusters mean score = 0.23858690585075312
n 19 clusters mean score = 0.2463828745476957
n 20 clusters mean score = 0.2430383670352158
n 21 clusters mean score = 0.2332319170182049
n 22 clusters mean score = 0.23638382540939945
n 23 clusters mean score = 0.23244649168035175
n 24 clusters mean score = 0.236092816572463
n 25 clusters mean score = 0.23075855745348545
n 26 clusters mean score = 0.23188732284904806
n 27 clusters mean score = 0.23083231365636486
n 28 clusters mean score = 0.23234897878921218
n 29 clusters mean score = 0.2322033505130387
size of X = (200, 12)
NaNs in the array = 0
n 2 clusters mean score = 0.3019472180910475
n 3 clusters mean score = 0.29308289892187134
n 4 clusters mean score = 0.33831442452011035
n 5 clusters mean score = 0.28523802592938907
n 6 clusters mean score = 0.25767881311021745
n 7 clusters mean score = 0.2754707193203212
n 8 clusters mean score = 0.25657906865205354
n 9 clusters mean score = 0.24514845430530294
n 10 clusters mean score = 0.23430576043122273
n 11 clusters mean score = 0.2432845137791638
n 12 clusters mean score = 0.23462609340780535
n 13 clusters mean score = 0.2302814916669201
n 14 clusters mean score = 0.22470247036979582
n 15 clusters mean score = 0.21662796653666963
n 16 clusters mean score = 0.21548702047240664
n 17 clusters mean score = 0.20307854059922809
n 18 clusters mean score = 0.19736149011436752
n 19 clusters mean score = 0.20409394926718588
n 20 clusters mean score = 0.1929178208109101
n 21 clusters mean score = 0.1889080451865582
n 22 clusters mean score = 0.18256334449208286
n 23 clusters mean score = 0.18941767401479007
n 24 clusters mean score = 0.18005467351458077
n 25 clusters mean score = 0.18841940546215505
n 26 clusters mean score = 0.18302213707106713
n 27 clusters mean score = 0.17728959729994478
n 28 clusters mean score = 0.13674054495416377
n 29 clusters mean score = 0.12466419470159514
size of X = (3306, 12)
NaNs in the array = 0
n 2 clusters mean score = 0.3149973778737375
n 3 clusters mean score = 0.36689291493749093
n 4 clusters mean score = 0.4547645749055188
n 5 clusters mean score = 0.44519244738515534
n 6 clusters mean score = 0.44059204780945127
n 7 clusters mean score = 0.4519727669147902
n 8 clusters mean score = 0.4508328375551397
n 9 clusters mean score = 0.4428667264433145
n 10 clusters mean score = 0.4390438374052691
n 11 clusters mean score = 0.44174936690933875
n 12 clusters mean score = 0.4418024249691571
n 13 clusters mean score = 0.43819534309012537
n 14 clusters mean score = 0.43942906438125934
n 15 clusters mean score = 0.4226921957174364
n 16 clusters mean score = 0.3979430039964223
n 17 clusters mean score = 0.3862029026398135
n 18 clusters mean score = 0.38901380396068114
n 19 clusters mean score = 0.3862459228093757
n 20 clusters mean score = 0.38639277506028524
n 21 clusters mean score = 0.38750635002352735
n 22 clusters mean score = 0.38230973843821847
n 23 clusters mean score = 0.38163703623509526
n 24 clusters mean score = 0.37565327235288604
n 25 clusters mean score = 0.37419634720116296
n 26 clusters mean score = 0.3688492846978232
n 27 clusters mean score = 0.3632876435912514
n 28 clusters mean score = 0.3642749941922404
n 29 clusters mean score = 0.36442523087447337

select number of clusters

In [132]:
n_clusters_cre = {'Slc17a7-IRES2-Cre': 10,
                 'Sst-IRES-Cre': 6, 
                 'Vip-IRES-Cre':12}

plot silhouettes with selected # clusters

In [133]:
plotting.plot_silhouette_scores_n_clusters(silhouette_scores, cell_metadata, n_clusters_cre=n_clusters_cre, save_dir=base_dir, folder=folder)

get coclustering matrices per cre line

In [134]:
coclustering_matrices = processing.get_coclustering_matrix(glm_version, feature_matrix, cell_metadata, n_clusters_cre, save_dir, nboot=100)
  0%|                                                                                          | 0/100 [00:00<?, ?it/s]
  1%|▊                                                                                 | 1/100 [00:00<00:18,  5.25it/s]
  2%|█▋                                                                                | 2/100 [00:00<00:17,  5.62it/s]
  3%|██▍                                                                               | 3/100 [00:00<00:18,  5.32it/s]
  4%|███▎                                                                              | 4/100 [00:00<00:17,  5.39it/s]
  5%|████                                                                              | 5/100 [00:00<00:17,  5.46it/s]
  6%|████▉                                                                             | 6/100 [00:01<00:16,  5.70it/s]
  7%|█████▋                                                                            | 7/100 [00:01<00:16,  5.50it/s]
  8%|██████▌                                                                           | 8/100 [00:01<00:21,  4.24it/s]
  9%|███████▍                                                                          | 9/100 [00:01<00:21,  4.21it/s]
 10%|████████                                                                         | 10/100 [00:02<00:20,  4.40it/s]
 11%|████████▉                                                                        | 11/100 [00:02<00:19,  4.55it/s]
 12%|█████████▋                                                                       | 12/100 [00:02<00:18,  4.69it/s]
 13%|██████████▌                                                                      | 13/100 [00:02<00:18,  4.74it/s]
 14%|███████████▎                                                                     | 14/100 [00:02<00:17,  4.95it/s]
 15%|████████████▏                                                                    | 15/100 [00:03<00:17,  4.82it/s]
 16%|████████████▉                                                                    | 16/100 [00:03<00:18,  4.50it/s]
 17%|█████████████▊                                                                   | 17/100 [00:03<00:18,  4.54it/s]
 18%|██████████████▌                                                                  | 18/100 [00:03<00:19,  4.12it/s]
 19%|███████████████▍                                                                 | 19/100 [00:04<00:18,  4.49it/s]
 20%|████████████████▏                                                                | 20/100 [00:04<00:16,  4.72it/s]
 21%|█████████████████                                                                | 21/100 [00:04<00:15,  4.98it/s]
 22%|█████████████████▊                                                               | 22/100 [00:04<00:16,  4.80it/s]
 23%|██████████████████▋                                                              | 23/100 [00:04<00:16,  4.80it/s]
 24%|███████████████████▍                                                             | 24/100 [00:05<00:15,  4.94it/s]
 25%|████████████████████▎                                                            | 25/100 [00:05<00:14,  5.19it/s]
 26%|█████████████████████                                                            | 26/100 [00:05<00:16,  4.54it/s]
 27%|█████████████████████▊                                                           | 27/100 [00:05<00:18,  4.02it/s]
 28%|██████████████████████▋                                                          | 28/100 [00:05<00:16,  4.44it/s]
 29%|███████████████████████▍                                                         | 29/100 [00:06<00:14,  4.80it/s]
 30%|████████████████████████▎                                                        | 30/100 [00:06<00:14,  4.70it/s]
 31%|█████████████████████████                                                        | 31/100 [00:06<00:14,  4.74it/s]
 32%|█████████████████████████▉                                                       | 32/100 [00:06<00:13,  4.91it/s]
 33%|██████████████████████████▋                                                      | 33/100 [00:06<00:14,  4.59it/s]
 34%|███████████████████████████▌                                                     | 34/100 [00:07<00:13,  4.78it/s]
 35%|████████████████████████████▎                                                    | 35/100 [00:07<00:13,  4.85it/s]
 36%|█████████████████████████████▏                                                   | 36/100 [00:07<00:12,  5.04it/s]
 37%|█████████████████████████████▉                                                   | 37/100 [00:07<00:12,  4.86it/s]
 38%|██████████████████████████████▊                                                  | 38/100 [00:08<00:13,  4.60it/s]
 39%|███████████████████████████████▌                                                 | 39/100 [00:08<00:12,  4.94it/s]
 40%|████████████████████████████████▍                                                | 40/100 [00:08<00:13,  4.49it/s]
 41%|█████████████████████████████████▏                                               | 41/100 [00:08<00:12,  4.66it/s]
 42%|██████████████████████████████████                                               | 42/100 [00:08<00:12,  4.64it/s]
 43%|██████████████████████████████████▊                                              | 43/100 [00:09<00:13,  4.27it/s]
 44%|███████████████████████████████████▋                                             | 44/100 [00:09<00:12,  4.46it/s]
 45%|████████████████████████████████████▍                                            | 45/100 [00:09<00:12,  4.38it/s]
 46%|█████████████████████████████████████▎                                           | 46/100 [00:09<00:11,  4.73it/s]
 47%|██████████████████████████████████████                                           | 47/100 [00:10<00:12,  4.26it/s]
 48%|██████████████████████████████████████▉                                          | 48/100 [00:10<00:11,  4.52it/s]
 49%|███████████████████████████████████████▋                                         | 49/100 [00:10<00:09,  5.21it/s]
 50%|████████████████████████████████████████▌                                        | 50/100 [00:10<00:09,  5.02it/s]
 51%|█████████████████████████████████████████▎                                       | 51/100 [00:10<00:10,  4.56it/s]
 52%|██████████████████████████████████████████                                       | 52/100 [00:11<00:10,  4.50it/s]
 53%|██████████████████████████████████████████▉                                      | 53/100 [00:11<00:10,  4.59it/s]
 54%|███████████████████████████████████████████▋                                     | 54/100 [00:11<00:10,  4.51it/s]
 55%|████████████████████████████████████████████▌                                    | 55/100 [00:11<00:10,  4.40it/s]
 56%|█████████████████████████████████████████████▎                                   | 56/100 [00:11<00:10,  4.31it/s]
 57%|██████████████████████████████████████████████▏                                  | 57/100 [00:12<00:09,  4.68it/s]
 58%|██████████████████████████████████████████████▉                                  | 58/100 [00:12<00:08,  4.97it/s]
 59%|███████████████████████████████████████████████▊                                 | 59/100 [00:12<00:07,  5.15it/s]
 60%|████████████████████████████████████████████████▌                                | 60/100 [00:12<00:07,  5.48it/s]
 61%|█████████████████████████████████████████████████▍                               | 61/100 [00:12<00:07,  4.93it/s]
 62%|██████████████████████████████████████████████████▏                              | 62/100 [00:13<00:07,  5.05it/s]
 63%|███████████████████████████████████████████████████                              | 63/100 [00:13<00:06,  5.42it/s]
 64%|███████████████████████████████████████████████████▊                             | 64/100 [00:13<00:06,  5.66it/s]
 65%|████████████████████████████████████████████████████▋                            | 65/100 [00:13<00:06,  5.32it/s]
 66%|█████████████████████████████████████████████████████▍                           | 66/100 [00:13<00:06,  5.32it/s]
 67%|██████████████████████████████████████████████████████▎                          | 67/100 [00:13<00:06,  5.38it/s]
 68%|███████████████████████████████████████████████████████                          | 68/100 [00:14<00:05,  5.63it/s]
 69%|███████████████████████████████████████████████████████▉                         | 69/100 [00:14<00:05,  5.52it/s]
 70%|████████████████████████████████████████████████████████▋                        | 70/100 [00:14<00:05,  5.31it/s]
 71%|█████████████████████████████████████████████████████████▌                       | 71/100 [00:14<00:06,  4.79it/s]
 72%|██████████████████████████████████████████████████████████▎                      | 72/100 [00:15<00:06,  4.48it/s]
 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [00:15<00:05,  4.67it/s]
 74%|███████████████████████████████████████████████████████████▉                     | 74/100 [00:15<00:05,  5.09it/s]
 75%|████████████████████████████████████████████████████████████▊                    | 75/100 [00:15<00:04,  5.30it/s]
 76%|█████████████████████████████████████████████████████████████▌                   | 76/100 [00:15<00:04,  5.51it/s]
 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [00:15<00:04,  5.04it/s]
 78%|███████████████████████████████████████████████████████████████▏                 | 78/100 [00:16<00:04,  4.87it/s]
 79%|███████████████████████████████████████████████████████████████▉                 | 79/100 [00:16<00:04,  4.89it/s]
 80%|████████████████████████████████████████████████████████████████▊                | 80/100 [00:16<00:04,  4.70it/s]
 81%|█████████████████████████████████████████████████████████████████▌               | 81/100 [00:16<00:04,  4.60it/s]
 82%|██████████████████████████████████████████████████████████████████▍              | 82/100 [00:17<00:03,  4.65it/s]
 83%|███████████████████████████████████████████████████████████████████▏             | 83/100 [00:17<00:03,  4.73it/s]
 84%|████████████████████████████████████████████████████████████████████             | 84/100 [00:17<00:03,  4.87it/s]
 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [00:17<00:03,  4.92it/s]
 86%|█████████████████████████████████████████████████████████████████████▋           | 86/100 [00:17<00:02,  5.10it/s]
 87%|██████████████████████████████████████████████████████████████████████▍          | 87/100 [00:18<00:02,  4.95it/s]
 88%|███████████████████████████████████████████████████████████████████████▎         | 88/100 [00:18<00:02,  4.76it/s]
 89%|████████████████████████████████████████████████████████████████████████         | 89/100 [00:18<00:02,  5.00it/s]
 90%|████████████████████████████████████████████████████████████████████████▉        | 90/100 [00:18<00:02,  4.81it/s]
 91%|█████████████████████████████████████████████████████████████████████████▋       | 91/100 [00:18<00:01,  5.13it/s]
 92%|██████████████████████████████████████████████████████████████████████████▌      | 92/100 [00:19<00:01,  4.85it/s]
 93%|███████████████████████████████████████████████████████████████████████████▎     | 93/100 [00:19<00:01,  4.99it/s]
 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [00:19<00:01,  4.75it/s]
 95%|████████████████████████████████████████████████████████████████████████████▉    | 95/100 [00:19<00:01,  4.92it/s]
 96%|█████████████████████████████████████████████████████████████████████████████▊   | 96/100 [00:19<00:00,  5.30it/s]
 97%|██████████████████████████████████████████████████████████████████████████████▌  | 97/100 [00:20<00:00,  5.20it/s]
 98%|███████████████████████████████████████████████████████████████████████████████▍ | 98/100 [00:20<00:00,  4.81it/s]
 99%|████████████████████████████████████████████████████████████████████████████████▏| 99/100 [00:20<00:00,  4.81it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:20<00:00,  4.83it/s]

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]
  2%|█▋                                                                                | 2/100 [00:00<00:10,  9.58it/s]
  3%|██▍                                                                               | 3/100 [00:00<00:10,  9.44it/s]
  4%|███▎                                                                              | 4/100 [00:00<00:10,  9.25it/s]
  6%|████▉                                                                             | 6/100 [00:00<00:09,  9.85it/s]
  7%|█████▋                                                                            | 7/100 [00:00<00:09,  9.63it/s]
  9%|███████▍                                                                          | 9/100 [00:00<00:09,  9.85it/s]
 11%|████████▉                                                                        | 11/100 [00:01<00:08, 10.11it/s]
 12%|█████████▋                                                                       | 12/100 [00:01<00:10,  8.52it/s]
 13%|██████████▌                                                                      | 13/100 [00:01<00:09,  8.79it/s]
 14%|███████████▎                                                                     | 14/100 [00:01<00:10,  8.57it/s]
 16%|████████████▉                                                                    | 16/100 [00:01<00:09,  9.25it/s]
 17%|█████████████▊                                                                   | 17/100 [00:01<00:09,  8.92it/s]
 19%|███████████████▍                                                                 | 19/100 [00:01<00:08,  9.26it/s]
 20%|████████████████▏                                                                | 20/100 [00:02<00:09,  8.52it/s]
 22%|█████████████████▊                                                               | 22/100 [00:02<00:08,  8.85it/s]
 23%|██████████████████▋                                                              | 23/100 [00:02<00:08,  8.78it/s]
 24%|███████████████████▍                                                             | 24/100 [00:02<00:09,  8.02it/s]
 26%|█████████████████████                                                            | 26/100 [00:02<00:08,  8.36it/s]
 28%|██████████████████████▋                                                          | 28/100 [00:02<00:07,  9.19it/s]
 30%|████████████████████████▎                                                        | 30/100 [00:03<00:07,  9.41it/s]
 31%|█████████████████████████                                                        | 31/100 [00:03<00:07,  9.00it/s]
 33%|██████████████████████████▋                                                      | 33/100 [00:03<00:07,  9.51it/s]
 34%|███████████████████████████▌                                                     | 34/100 [00:03<00:07,  8.27it/s]
 35%|████████████████████████████▎                                                    | 35/100 [00:03<00:08,  7.55it/s]
 37%|█████████████████████████████▉                                                   | 37/100 [00:04<00:07,  8.00it/s]
 38%|██████████████████████████████▊                                                  | 38/100 [00:04<00:08,  7.53it/s]
 39%|███████████████████████████████▌                                                 | 39/100 [00:04<00:08,  7.42it/s]
 40%|████████████████████████████████▍                                                | 40/100 [00:04<00:08,  7.17it/s]
 41%|█████████████████████████████████▏                                               | 41/100 [00:04<00:07,  7.48it/s]
 43%|██████████████████████████████████▊                                              | 43/100 [00:04<00:07,  7.85it/s]
 45%|████████████████████████████████████▍                                            | 45/100 [00:04<00:06,  8.39it/s]
 46%|█████████████████████████████████████▎                                           | 46/100 [00:05<00:06,  8.55it/s]
 47%|██████████████████████████████████████                                           | 47/100 [00:05<00:06,  8.63it/s]
 48%|██████████████████████████████████████▉                                          | 48/100 [00:05<00:07,  7.41it/s]
 50%|████████████████████████████████████████▌                                        | 50/100 [00:05<00:05,  8.55it/s]
 51%|█████████████████████████████████████████▎                                       | 51/100 [00:05<00:05,  8.48it/s]
 52%|██████████████████████████████████████████                                       | 52/100 [00:05<00:05,  8.58it/s]
 54%|███████████████████████████████████████████▋                                     | 54/100 [00:05<00:05,  8.80it/s]
 56%|█████████████████████████████████████████████▎                                   | 56/100 [00:06<00:04,  9.23it/s]
 58%|██████████████████████████████████████████████▉                                  | 58/100 [00:06<00:04,  9.62it/s]
 59%|███████████████████████████████████████████████▊                                 | 59/100 [00:06<00:04,  9.18it/s]
 60%|████████████████████████████████████████████████▌                                | 60/100 [00:06<00:04,  8.71it/s]
 61%|█████████████████████████████████████████████████▍                               | 61/100 [00:06<00:04,  8.51it/s]
 62%|██████████████████████████████████████████████████▏                              | 62/100 [00:06<00:04,  7.88it/s]
 63%|███████████████████████████████████████████████████                              | 63/100 [00:07<00:04,  7.62it/s]
 65%|████████████████████████████████████████████████████▋                            | 65/100 [00:07<00:04,  8.11it/s]
 66%|█████████████████████████████████████████████████████▍                           | 66/100 [00:07<00:04,  7.93it/s]
 68%|███████████████████████████████████████████████████████                          | 68/100 [00:07<00:03,  8.07it/s]
 69%|███████████████████████████████████████████████████████▉                         | 69/100 [00:07<00:03,  8.32it/s]
 70%|████████████████████████████████████████████████████████▋                        | 70/100 [00:07<00:03,  8.71it/s]
 72%|██████████████████████████████████████████████████████████▎                      | 72/100 [00:08<00:03,  9.32it/s]
 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [00:08<00:02,  9.11it/s]
 74%|███████████████████████████████████████████████████████████▉                     | 74/100 [00:08<00:02,  8.95it/s]
 76%|█████████████████████████████████████████████████████████████▌                   | 76/100 [00:08<00:02,  9.10it/s]
 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [00:08<00:02,  7.99it/s]
 78%|███████████████████████████████████████████████████████████████▏                 | 78/100 [00:08<00:02,  7.99it/s]
 79%|███████████████████████████████████████████████████████████████▉                 | 79/100 [00:08<00:02,  7.71it/s]
 80%|████████████████████████████████████████████████████████████████▊                | 80/100 [00:08<00:02,  8.01it/s]
 82%|██████████████████████████████████████████████████████████████████▍              | 82/100 [00:09<00:02,  8.41it/s]
 84%|████████████████████████████████████████████████████████████████████             | 84/100 [00:09<00:01,  9.01it/s]
 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [00:09<00:01,  8.54it/s]
 86%|█████████████████████████████████████████████████████████████████████▋           | 86/100 [00:09<00:01,  7.75it/s]
 87%|██████████████████████████████████████████████████████████████████████▍          | 87/100 [00:09<00:01,  7.76it/s]
 88%|███████████████████████████████████████████████████████████████████████▎         | 88/100 [00:09<00:01,  7.62it/s]
 89%|████████████████████████████████████████████████████████████████████████         | 89/100 [00:10<00:01,  8.11it/s]
 90%|████████████████████████████████████████████████████████████████████████▉        | 90/100 [00:10<00:01,  7.82it/s]
 92%|██████████████████████████████████████████████████████████████████████████▌      | 92/100 [00:10<00:00,  8.60it/s]
 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [00:10<00:00,  9.09it/s]
 95%|████████████████████████████████████████████████████████████████████████████▉    | 95/100 [00:10<00:00,  8.77it/s]
 96%|█████████████████████████████████████████████████████████████████████████████▊   | 96/100 [00:10<00:00,  8.76it/s]
 97%|██████████████████████████████████████████████████████████████████████████████▌  | 97/100 [00:10<00:00,  8.37it/s]
 99%|████████████████████████████████████████████████████████████████████████████████▏| 99/100 [00:11<00:00,  8.71it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:11<00:00,  8.87it/s]

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]
  1%|▊                                                                                 | 1/100 [00:02<04:11,  2.54s/it]
  2%|█▋                                                                                | 2/100 [00:04<04:03,  2.49s/it]
  3%|██▍                                                                               | 3/100 [00:07<03:55,  2.43s/it]
  4%|███▎                                                                              | 4/100 [00:09<03:47,  2.37s/it]
  5%|████                                                                              | 5/100 [00:11<03:45,  2.37s/it]
  6%|████▉                                                                             | 6/100 [00:14<03:44,  2.39s/it]
  7%|█████▋                                                                            | 7/100 [00:16<03:45,  2.43s/it]
  8%|██████▌                                                                           | 8/100 [00:19<03:46,  2.46s/it]
  9%|███████▍                                                                          | 9/100 [00:21<03:39,  2.41s/it]
 10%|████████                                                                         | 10/100 [00:24<03:39,  2.44s/it]
 11%|████████▉                                                                        | 11/100 [00:26<03:32,  2.39s/it]
 12%|█████████▋                                                                       | 12/100 [00:28<03:29,  2.38s/it]
 13%|██████████▌                                                                      | 13/100 [00:31<03:27,  2.38s/it]
 14%|███████████▎                                                                     | 14/100 [00:33<03:25,  2.39s/it]
 15%|████████████▏                                                                    | 15/100 [00:35<03:23,  2.39s/it]
 16%|████████████▉                                                                    | 16/100 [00:38<03:16,  2.34s/it]
 17%|█████████████▊                                                                   | 17/100 [00:40<03:18,  2.39s/it]
 18%|██████████████▌                                                                  | 18/100 [00:43<03:17,  2.41s/it]
 19%|███████████████▍                                                                 | 19/100 [00:45<03:12,  2.38s/it]
 20%|████████████████▏                                                                | 20/100 [00:47<03:08,  2.36s/it]
 21%|█████████████████                                                                | 21/100 [00:50<03:12,  2.43s/it]
 22%|█████████████████▊                                                               | 22/100 [00:52<03:05,  2.38s/it]
 23%|██████████████████▋                                                              | 23/100 [00:54<03:01,  2.35s/it]
 24%|███████████████████▍                                                             | 24/100 [00:57<02:56,  2.32s/it]
 25%|████████████████████▎                                                            | 25/100 [00:59<02:51,  2.29s/it]
 26%|█████████████████████                                                            | 26/100 [01:01<02:52,  2.33s/it]
 27%|█████████████████████▊                                                           | 27/100 [01:04<02:51,  2.35s/it]
 28%|██████████████████████▋                                                          | 28/100 [01:06<02:46,  2.31s/it]
 29%|███████████████████████▍                                                         | 29/100 [01:08<02:46,  2.35s/it]
 30%|████████████████████████▎                                                        | 30/100 [01:11<02:43,  2.34s/it]
 31%|█████████████████████████                                                        | 31/100 [01:13<02:42,  2.36s/it]
 32%|█████████████████████████▉                                                       | 32/100 [01:15<02:41,  2.37s/it]
 33%|██████████████████████████▋                                                      | 33/100 [01:18<02:37,  2.35s/it]
 34%|███████████████████████████▌                                                     | 34/100 [01:20<02:33,  2.33s/it]
 35%|████████████████████████████▎                                                    | 35/100 [01:22<02:33,  2.37s/it]
 36%|█████████████████████████████▏                                                   | 36/100 [01:25<02:34,  2.42s/it]
 37%|█████████████████████████████▉                                                   | 37/100 [01:27<02:29,  2.37s/it]
 38%|██████████████████████████████▊                                                  | 38/100 [01:30<02:27,  2.37s/it]
 39%|███████████████████████████████▌                                                 | 39/100 [01:32<02:23,  2.35s/it]
 40%|████████████████████████████████▍                                                | 40/100 [01:34<02:19,  2.33s/it]
 41%|█████████████████████████████████▏                                               | 41/100 [01:36<02:16,  2.31s/it]
 42%|██████████████████████████████████                                               | 42/100 [01:39<02:14,  2.31s/it]
 43%|██████████████████████████████████▊                                              | 43/100 [01:41<02:13,  2.34s/it]
 44%|███████████████████████████████████▋                                             | 44/100 [01:44<02:11,  2.35s/it]
 45%|████████████████████████████████████▍                                            | 45/100 [01:46<02:07,  2.33s/it]
 46%|█████████████████████████████████████▎                                           | 46/100 [01:48<02:06,  2.35s/it]
 47%|██████████████████████████████████████                                           | 47/100 [01:51<02:03,  2.33s/it]
 48%|██████████████████████████████████████▉                                          | 48/100 [01:53<02:00,  2.31s/it]
 49%|███████████████████████████████████████▋                                         | 49/100 [01:55<01:56,  2.29s/it]
 50%|████████████████████████████████████████▌                                        | 50/100 [01:57<01:54,  2.29s/it]
 51%|█████████████████████████████████████████▎                                       | 51/100 [02:00<01:53,  2.32s/it]
 52%|██████████████████████████████████████████                                       | 52/100 [02:02<01:50,  2.30s/it]
 53%|██████████████████████████████████████████▉                                      | 53/100 [02:04<01:48,  2.31s/it]
 54%|███████████████████████████████████████████▋                                     | 54/100 [02:07<01:46,  2.31s/it]
 55%|████████████████████████████████████████████▌                                    | 55/100 [02:09<01:43,  2.30s/it]
 56%|█████████████████████████████████████████████▎                                   | 56/100 [02:11<01:39,  2.27s/it]
 57%|██████████████████████████████████████████████▏                                  | 57/100 [02:13<01:38,  2.29s/it]
 58%|██████████████████████████████████████████████▉                                  | 58/100 [02:16<01:35,  2.28s/it]
 59%|███████████████████████████████████████████████▊                                 | 59/100 [02:18<01:34,  2.29s/it]
 60%|████████████████████████████████████████████████▌                                | 60/100 [02:20<01:31,  2.30s/it]
 61%|█████████████████████████████████████████████████▍                               | 61/100 [02:23<01:29,  2.30s/it]
 62%|██████████████████████████████████████████████████▏                              | 62/100 [02:25<01:28,  2.33s/it]
 63%|███████████████████████████████████████████████████                              | 63/100 [02:27<01:26,  2.34s/it]
 64%|███████████████████████████████████████████████████▊                             | 64/100 [02:30<01:24,  2.36s/it]
 65%|████████████████████████████████████████████████████▋                            | 65/100 [02:32<01:20,  2.31s/it]
 66%|█████████████████████████████████████████████████████▍                           | 66/100 [02:34<01:18,  2.31s/it]
 67%|██████████████████████████████████████████████████████▎                          | 67/100 [02:37<01:15,  2.30s/it]
 68%|███████████████████████████████████████████████████████                          | 68/100 [02:39<01:15,  2.36s/it]
 69%|███████████████████████████████████████████████████████▉                         | 69/100 [02:41<01:12,  2.34s/it]
 70%|████████████████████████████████████████████████████████▋                        | 70/100 [02:44<01:09,  2.32s/it]
 71%|█████████████████████████████████████████████████████████▌                       | 71/100 [02:46<01:06,  2.31s/it]
 72%|██████████████████████████████████████████████████████████▎                      | 72/100 [02:48<01:05,  2.32s/it]
 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [02:51<01:04,  2.39s/it]
 74%|███████████████████████████████████████████████████████████▉                     | 74/100 [02:53<01:02,  2.42s/it]
 75%|████████████████████████████████████████████████████████████▊                    | 75/100 [02:56<00:59,  2.39s/it]
 76%|█████████████████████████████████████████████████████████████▌                   | 76/100 [02:58<00:55,  2.32s/it]
 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [03:00<00:52,  2.30s/it]
 78%|███████████████████████████████████████████████████████████████▏                 | 78/100 [03:02<00:51,  2.34s/it]
 79%|███████████████████████████████████████████████████████████████▉                 | 79/100 [03:05<00:50,  2.39s/it]
 80%|████████████████████████████████████████████████████████████████▊                | 80/100 [03:07<00:47,  2.38s/it]
 81%|█████████████████████████████████████████████████████████████████▌               | 81/100 [03:10<00:44,  2.34s/it]
 82%|██████████████████████████████████████████████████████████████████▍              | 82/100 [03:12<00:41,  2.30s/it]
 83%|███████████████████████████████████████████████████████████████████▏             | 83/100 [03:14<00:38,  2.29s/it]
 84%|████████████████████████████████████████████████████████████████████             | 84/100 [03:16<00:37,  2.34s/it]
 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [03:19<00:35,  2.34s/it]
 86%|█████████████████████████████████████████████████████████████████████▋           | 86/100 [03:21<00:32,  2.35s/it]
 87%|██████████████████████████████████████████████████████████████████████▍          | 87/100 [03:23<00:30,  2.31s/it]
 88%|███████████████████████████████████████████████████████████████████████▎         | 88/100 [03:26<00:28,  2.37s/it]
 89%|████████████████████████████████████████████████████████████████████████         | 89/100 [03:28<00:25,  2.35s/it]
 90%|████████████████████████████████████████████████████████████████████████▉        | 90/100 [03:31<00:23,  2.37s/it]
 91%|█████████████████████████████████████████████████████████████████████████▋       | 91/100 [03:33<00:21,  2.40s/it]
 92%|██████████████████████████████████████████████████████████████████████████▌      | 92/100 [03:36<00:19,  2.41s/it]
 93%|███████████████████████████████████████████████████████████████████████████▎     | 93/100 [03:38<00:16,  2.38s/it]
 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [03:40<00:14,  2.37s/it]
 95%|████████████████████████████████████████████████████████████████████████████▉    | 95/100 [03:43<00:11,  2.37s/it]
 96%|█████████████████████████████████████████████████████████████████████████████▊   | 96/100 [03:45<00:09,  2.38s/it]
 97%|██████████████████████████████████████████████████████████████████████████████▌  | 97/100 [03:47<00:07,  2.38s/it]
 98%|███████████████████████████████████████████████████████████████████████████████▍ | 98/100 [03:50<00:04,  2.39s/it]
 99%|████████████████████████████████████████████████████████████████████████████████▏| 99/100 [03:52<00:02,  2.36s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [03:54<00:00,  2.35s/it]

get cluster labels per cre line from Agglomerative clustering on co-clustering matrix

In [135]:
cluster_labels = processing.get_cluster_labels(coclustering_matrices, cell_metadata, n_clusters_cre, save_dir, load=False)
cluster_labels.head()
generating cluster labels from coclustering matrix
saving cluster_labels to \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220310_across_session_norm\cluster_labels_Vip_12_Sst_6_Slc17a7_10.h5
Out[135]:
labels cell_specimen_id cre_line cluster_id
0 0 1086492406 Vip-IRES-Cre 3
1 4 1086492307 Vip-IRES-Cre 5
2 0 1086492221 Vip-IRES-Cre 3
3 6 1086492174 Vip-IRES-Cre 7
4 8 1086491936 Vip-IRES-Cre 2

merge cluster labels with cell metadata, remove small clusters, and add manual sort order

In [137]:
cluster_labels.cluster_id.unique()
Out[137]:
array([ 3,  5,  7,  2,  8,  1, 11,  6,  9, 10,  4, 12], dtype=int64)
In [140]:
cluster_meta = processing.get_cluster_meta(cluster_labels, cell_metadata, feature_matrix, n_clusters_cre, save_dir, load=False)
cluster_meta.head()
generating cluster_meta
0 cells dropped total
adding within cluster correlation to cluster_meta
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:2559: RuntimeWarning:

invalid value encountered in true_divide

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:2560: RuntimeWarning:

invalid value encountered in true_divide

saving cluster_meta to \\allen\programs\braintv\workgroups\nc-ophys\visual_behavior\platform_paper_plots\figure_4\24_events_all_L2_optimize_by_session\220310_across_session_norm\cluster_metadata_Vip_12_Sst_6_Slc17a7_10.h5
C:\Users\marinag\AppData\Roaming\Python\Python37\site-packages\pandas\core\generic.py:2530: PerformanceWarning:


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block3_values] [items->['equipment_name', 'full_genotype', 'reporter_line', 'driver_line', 'sex', 'cre_line', 'indicator', 'project_code', 'targeted_structure', 'date_of_acquisition', 'session_type', 'experience_level', 'image_set', 'cell_type', 'area_depth', 'area_binned_depth', 'layer', 'experience_exposure', 'manual_sort_order']]


Out[140]:
cluster_id labels ophys_experiment_id equipment_name full_genotype mouse_id reporter_line driver_line sex age_in_days ... first_novel n_relative_to_first_novel last_familiar last_familiar_active second_novel second_novel_active experience_exposure original_cluster_id manual_sort_order within_cluster_correlation
cell_specimen_id
1086492406 3 0 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 3 4 0.449441
1086492307 5 4 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 5 7 0.962579
1086492221 3 0 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 3 4 0.761867
1086492174 7 6 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 7 3 0.874636
1086491936 2 8 795073741 CAM2P.4 Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt 412036 Ai148(TIT2L-GC6f-ICL-tTA2) [Vip-IRES-Cre] F 151.0 ... False -1.0 True True False False Familiar 3 2 2 0.376984

5 rows × 45 columns

In [141]:
cluster_meta[['labels', 'cluster_id', 'original_cluster_id', 'manual_sort_order', 'within_cluster_correlation']].head()
Out[141]:
labels cluster_id original_cluster_id manual_sort_order within_cluster_correlation
cell_specimen_id
1086492406 0 3 3 4 0.449441
1086492307 4 5 5 7 0.962579
1086492221 0 3 3 4 0.761867
1086492174 6 7 7 3 0.874636
1086491936 8 2 2 2 0.376984
In [ ]:
 

plot co-clustering matrix and dendrograms sorted by linkage matrix

In [142]:
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
In [ ]:
# for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
#     plotting.plot_coclustering_matrix_and_dendrograms(coclustering_matrices, cre_line, cluster_meta, n_clusters_cre, 
#                                                      save_dir=base_dir, folder=folder)
In [ ]:
# data = coclustering_matrices[cre_line]

# fig, ax = plt.subplots(figsize=(8,8))
# ax = sns.clustermap(data=data, cmap='Greys', cbar_kws={'label':'co-clustering probability'})

sort coclustering matrix by cluster ID / cluster size

In [190]:
cre_lines = processing.get_cre_lines(cell_metadata)
for cre_line in cre_lines: 
    plotting.plot_coclustering_matrix_sorted_by_cluster_size(coclustering_matrices, cluster_meta, cre_line, 
                                                    save_dir=base_dir, folder=folder, ax=None)

plot average dropout scores for each cluster

plot each cluster separately and save to single cell examples dir

In [ ]:
cell_examples_dir = os.path.join(save_dir, 'matched_cell_examples')
if not os.path.exists(cell_examples_dir):
    os.mkdir(cell_examples_dir)
In [146]:
plotting.plot_dropout_heatmaps_and_save_to_cell_examples_folders(cluster_meta, feature_matrix, save_dir)
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py:539: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

plot average dropouts for each cre line in cluster size order

In [147]:
sort_col = 'cluster_id'

plotting.plot_dropout_heatmaps_for_clusters(cluster_meta, feature_matrix, sort_col=sort_col, save_dir=base_dir, folder=folder)

plot dropout heatmaps in manually sorted order

In [148]:
# sort_col = 'manual_sort_order'

# plotting.plot_dropout_heatmaps_for_clusters(cluster_meta, feature_matrix, sort_col=sort_col, save_dir=base_dir, folder=folder)
In [ ]:
 

plot feature matrix sorted by cluster ID

In [149]:
plotting.plot_feature_matrix_sorted(feature_matrix, cluster_meta, sort_col='cluster_id', save_dir=base_dir, folder=folder)
    
In [150]:
# plotting.plot_feature_matrix_sorted(feature_matrix, cluster_meta, sort_col='manual_sort_order', save_dir=base_dir, folder=folder)
    

umap with cluster labels

In [151]:
# for column in ['project_code', 'binned_depth', 'targeted_structure', 'mouse_id']:
label_col = 'cluster_id'

plotting.plot_umap_for_clusters(cluster_meta, feature_matrix, label_col=label_col, save_dir=base_dir, folder=folder)

Correlations within clusters

In [152]:
plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=None, save_dir=base_dir, folder=folder)
In [ ]:
# sort_order = processing.get_manual_sort_order()

# plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=sort_order, 
#                                                   suffix='_manual_sort', save_dir=base_dir, folder=folder)

average dropouts per cre line

In [153]:
plotting.plot_average_dropout_heatmap_for_cre_lines(dropouts, cluster_meta, save_dir=base_dir, folder=folder)

plot 100 cells per cluster to examine within cluster variability

In [157]:
plotting.plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)
20 cells in cluster 10
selecting a random subset of 20
56 cells in cluster 1
selecting a random subset of 56
49 cells in cluster 3
selecting a random subset of 49
32 cells in cluster 6
selecting a random subset of 32
32 cells in cluster 7
selecting a random subset of 32
54 cells in cluster 2
selecting a random subset of 54
25 cells in cluster 9
selecting a random subset of 25
42 cells in cluster 4
selecting a random subset of 42
36 cells in cluster 5
selecting a random subset of 36
28 cells in cluster 8
selecting a random subset of 28
15 cells in cluster 11
selecting a random subset of 15
11 cells in cluster 12
selecting a random subset of 11
76 cells in cluster 1
selecting a random subset of 76
49 cells in cluster 2
selecting a random subset of 49
27 cells in cluster 4
selecting a random subset of 27
32 cells in cluster 3
selecting a random subset of 32
6 cells in cluster 6
selecting a random subset of 6
10 cells in cluster 5
selecting a random subset of 10
397 cells in cluster 3
selecting a random subset of 100
318 cells in cluster 4
selecting a random subset of 100
730 cells in cluster 1
selecting a random subset of 100
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py:1106: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-157-ffa557462157> in <module>
----> 1 plotting.plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\plotting.py in plot_random_subset_of_cells_per_cluster(cluster_meta, dropouts, save_dir)
   1112                 cell_info = cluster_meta.loc[cell_specimen_id]['targeted_structure'] + '_' + str(cluster_meta.loc[cell_specimen_id]['imaging_depth'])
   1113                 mean_dropouts = dropouts.loc[cell_specimen_id].groupby('experience_level').mean()[features]
-> 1114                 ax[i] = sns.heatmap(mean_dropouts.T, cmap='Blues', vmin=0, vmax=1, ax=ax[i], cbar=False)#cbar_kws={'shrink':0.7, 'label':model_output_type})
   1115                 ax[i].set_title(str(cell_specimen_id)+'_'+cell_info, fontsize=10)
   1116                 ax[i].set_ylim(0,4)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\seaborn\matrix.py in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, xticklabels, yticklabels, mask, ax, **kwargs)
    551     if square:
    552         ax.set_aspect("equal")
--> 553     plotter.plot(ax, cbar_ax, kwargs)
    554     return ax
    555 

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\seaborn\matrix.py in plot(self, ax, cax, kws)
    334 
    335         # Possibly rotate them if they overlap
--> 336         _draw_figure(ax.figure)
    337 
    338         if axis_ticklabels_overlap(xtl):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\seaborn\utils.py in _draw_figure(fig)
     93     """Force draw of a matplotlib figure, accounting for back-compat."""
     94     # See https://github.com/matplotlib/matplotlib/issues/19197 for context
---> 95     fig.canvas.draw()
     96     if fig.stale:
     97         try:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
    386         self.renderer = self.get_renderer(cleared=True)
    387         with RendererAgg.lock:
--> 388             self.figure.draw(self.renderer)
    389             # A GUI class may be need to update a window using this draw, so
    390             # don't forget to call the superclass.

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\figure.py in draw(self, renderer)
   1707             self.patch.draw(renderer)
   1708             mimage._draw_list_compositing_images(
-> 1709                 renderer, self, artists, self.suppressComposite)
   1710 
   1711             renderer.close_group('figure')

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
    133     if not_composite or not has_images:
    134         for a in artists:
--> 135             a.draw(renderer)
    136     else:
    137         # Composite any adjacent images together

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axes\_base.py in draw(self, renderer, inframe)
   2605                 artists.remove(spine)
   2606 
-> 2607         self._update_title_position(renderer)
   2608 
   2609         if not self.axison or inframe:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axes\_base.py in _update_title_position(self, renderer)
   2554                     # this happens for an empty bb
   2555                     y = 1
-> 2556             if title.get_window_extent(renderer).ymin < top:
   2557                 y = self.transAxes.inverted().transform(
   2558                         (0., top))[1]

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\text.py in get_window_extent(self, renderer, dpi)
    878             self.figure.dpi = dpi
    879         if self.get_text() == '':
--> 880             tx, ty = self._get_xy_display()
    881             return Bbox.from_bounds(tx, ty, 0, 0)
    882 

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\text.py in _get_xy_display(self)
    222         """
    223         x, y = self.get_unitless_position()
--> 224         return self.get_transform().transform_point((x, y))
    225 
    226     def _get_multialignment(self):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform_point(self, point)
   1479         if len(point) != self.input_dims:
   1480             raise ValueError("The length of 'point' must be 'self.input_dims'")
-> 1481         return self.transform(np.asarray([point]))[0]
   1482 
   1483     def transform_path(self, path):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform(self, values)
   1392 
   1393         # Transform the values
-> 1394         res = self.transform_affine(self.transform_non_affine(values))
   1395 
   1396         # Convert the result back to the shape of the input values.

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform_affine(self, points)
   2371     def transform_affine(self, points):
   2372         # docstring inherited
-> 2373         return self.get_affine().transform(points)
   2374 
   2375     def transform_non_affine(self, points):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in get_affine(self)
   2399         else:
   2400             return Affine2D(np.dot(self._b.get_affine().get_matrix(),
-> 2401                                 self._a.get_affine().get_matrix()))
   2402 
   2403     def inverted(self):

<__array_function__ internals> in dot(*args, **kwargs)

KeyboardInterrupt: 
Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x0000025A6F7490D0> (for post_execute):
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\pyplot.py in post_execute()
    107             def post_execute():
    108                 if matplotlib.is_interactive():
--> 109                     draw_all()
    110 
    111             # IPython >= 2

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\_pylab_helpers.py in draw_all(cls, force)
    126         for f_mgr in cls.get_all_fig_managers():
    127             if force or f_mgr.canvas.figure.stale:
--> 128                 f_mgr.canvas.draw_idle()
    129 
    130 atexit.register(Gcf.destroy_all)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backend_bases.py in draw_idle(self, *args, **kwargs)
   1905         if not self._is_idle_drawing:
   1906             with self._idle_draw_cntx():
-> 1907                 self.draw(*args, **kwargs)
   1908 
   1909     def draw_cursor(self, event):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
    386         self.renderer = self.get_renderer(cleared=True)
    387         with RendererAgg.lock:
--> 388             self.figure.draw(self.renderer)
    389             # A GUI class may be need to update a window using this draw, so
    390             # don't forget to call the superclass.

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\figure.py in draw(self, renderer)
   1707             self.patch.draw(renderer)
   1708             mimage._draw_list_compositing_images(
-> 1709                 renderer, self, artists, self.suppressComposite)
   1710 
   1711             renderer.close_group('figure')

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
    133     if not_composite or not has_images:
    134         for a in artists:
--> 135             a.draw(renderer)
    136     else:
    137         # Composite any adjacent images together

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axes\_base.py in draw(self, renderer, inframe)
   2645             renderer.stop_rasterizing()
   2646 
-> 2647         mimage._draw_list_compositing_images(renderer, self, artists)
   2648 
   2649         renderer.close_group('axes')

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
    133     if not_composite or not has_images:
    134         for a in artists:
--> 135             a.draw(renderer)
    136     else:
    137         # Composite any adjacent images together

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axis.py in draw(self, renderer, *args, **kwargs)
   1206 
   1207         for tick in ticks_to_draw:
-> 1208             tick.draw(renderer)
   1209 
   1210         # scale up the axis label box to also find the neighbors, not

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axis.py in draw(self, renderer)
    295         for artist in [self.gridline, self.tick1line, self.tick2line,
    296                        self.label1, self.label2]:
--> 297             artist.draw(renderer)
    298         renderer.close_group(self.__name__)
    299         self.stale = False

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\lines.py in draw(self, renderer)
    864                 else:
    865                     # Don't scale for pixels, and don't stroke them
--> 866                     marker_trans = marker_trans.scale(w)
    867                 renderer.draw_markers(gc, marker_path, marker_trans,
    868                                       subsampled, affine.frozen(),

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in scale(self, sx, sy)
   1989         scale_mtx = np.array(
   1990             [[sx, 0.0, 0.0], [0.0, sy, 0.0], [0.0, 0.0, 1.0]], float)
-> 1991         self._mtx = np.dot(scale_mtx, self._mtx)
   1992         self.invalidate()
   1993         return self

<__array_function__ internals> in dot(*args, **kwargs)

KeyboardInterrupt: 
Error in callback <function flush_figures at 0x0000025A6F7A29D8> (for post_execute):
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel\pylab\backend_inline.py in flush_figures()
    115         # ignore the tracking, just draw and close all figures
    116         try:
--> 117             return show(True)
    118         except Exception as e:
    119             # safely show traceback if in IPython, else raise

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\ipykernel\pylab\backend_inline.py in show(close, block)
     37             display(
     38                 figure_manager.canvas.figure,
---> 39                 metadata=_fetch_figure_metadata(figure_manager.canvas.figure)
     40             )
     41     finally:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\display.py in display(include, exclude, metadata, transient, display_id, *objs, **kwargs)
    304             publish_display_data(data=obj, metadata=metadata, **kwargs)
    305         else:
--> 306             format_dict, md_dict = format(obj, include=include, exclude=exclude)
    307             if not format_dict:
    308                 # nothing to display (e.g. _ipython_display_ took over)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\formatters.py in format(self, obj, include, exclude)
    178             md = None
    179             try:
--> 180                 data = formatter(obj)
    181             except:
    182                 # FIXME: log the exception

<C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\decorator.py:decorator-gen-9> in __call__(self, obj)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\formatters.py in catch_format_error(method, self, *args, **kwargs)
    222     """show traceback on failed format call"""
    223     try:
--> 224         r = method(self, *args, **kwargs)
    225     except NotImplementedError:
    226         # don't warn on NotImplementedErrors

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
    339                 pass
    340             else:
--> 341                 return printer(obj)
    342             # Finally look for special method names
    343             method = get_real_method(obj, self.print_method)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig)
    242 
    243     if 'png' in formats:
--> 244         png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
    245     if 'retina' in formats or 'png2x' in formats:
    246         png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs)
    126 
    127     bytes_io = BytesIO()
--> 128     fig.canvas.print_figure(bytes_io, **kw)
    129     data = bytes_io.getvalue()
    130     if fmt == 'svg':

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)
   2054                         orientation=orientation,
   2055                         dryrun=True,
-> 2056                         **kwargs)
   2057                     renderer = self.figure._cachedRenderer
   2058                     bbox_artists = kwargs.pop("bbox_extra_artists", None)

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backends\backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args, **kwargs)
    525 
    526         else:
--> 527             FigureCanvasAgg.draw(self)
    528             renderer = self.get_renderer()
    529             with cbook._setattr_cm(renderer, dpi=self.figure.dpi), \

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self)
    386         self.renderer = self.get_renderer(cleared=True)
    387         with RendererAgg.lock:
--> 388             self.figure.draw(self.renderer)
    389             # A GUI class may be need to update a window using this draw, so
    390             # don't forget to call the superclass.

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\figure.py in draw(self, renderer)
   1707             self.patch.draw(renderer)
   1708             mimage._draw_list_compositing_images(
-> 1709                 renderer, self, artists, self.suppressComposite)
   1710 
   1711             renderer.close_group('figure')

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite)
    133     if not_composite or not has_images:
    134         for a in artists:
--> 135             a.draw(renderer)
    136     else:
    137         # Composite any adjacent images together

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs)
     36                 renderer.start_filter()
     37 
---> 38             return draw(artist, renderer, *args, **kwargs)
     39         finally:
     40             if artist.get_agg_filter() is not None:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axes\_base.py in draw(self, renderer, inframe)
   2605                 artists.remove(spine)
   2606 
-> 2607         self._update_title_position(renderer)
   2608 
   2609         if not self.axison or inframe:

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\axes\_base.py in _update_title_position(self, renderer)
   2554                     # this happens for an empty bb
   2555                     y = 1
-> 2556             if title.get_window_extent(renderer).ymin < top:
   2557                 y = self.transAxes.inverted().transform(
   2558                         (0., top))[1]

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\text.py in get_window_extent(self, renderer, dpi)
    878             self.figure.dpi = dpi
    879         if self.get_text() == '':
--> 880             tx, ty = self._get_xy_display()
    881             return Bbox.from_bounds(tx, ty, 0, 0)
    882 

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\text.py in _get_xy_display(self)
    222         """
    223         x, y = self.get_unitless_position()
--> 224         return self.get_transform().transform_point((x, y))
    225 
    226     def _get_multialignment(self):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform_point(self, point)
   1479         if len(point) != self.input_dims:
   1480             raise ValueError("The length of 'point' must be 'self.input_dims'")
-> 1481         return self.transform(np.asarray([point]))[0]
   1482 
   1483     def transform_path(self, path):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform(self, values)
   1392 
   1393         # Transform the values
-> 1394         res = self.transform_affine(self.transform_non_affine(values))
   1395 
   1396         # Convert the result back to the shape of the input values.

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in transform_affine(self, points)
   2371     def transform_affine(self, points):
   2372         # docstring inherited
-> 2373         return self.get_affine().transform(points)
   2374 
   2375     def transform_non_affine(self, points):

~\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\transforms.py in get_affine(self)
   2399         else:
   2400             return Affine2D(np.dot(self._b.get_affine().get_matrix(),
-> 2401                                 self._a.get_affine().get_matrix()))
   2402 
   2403     def inverted(self):

<__array_function__ internals> in dot(*args, **kwargs)

KeyboardInterrupt: 

breakdown by area and depth

We are going to normalize within each area or depth to account for the large imbalance in N due to Scientifica datasets only being performed in V1 at certain depths, as well as biological variation in cell type specific expression by depth

plot fraction cells by area and depth

In [158]:
label = 'fraction of cells'
plotting.plot_fraction_cells_by_area_depth(cluster_meta, n_clusters_cre, normalize=True, label=label, 
                                           save_dir=base_dir, folder=folder)
In [159]:
label = '# of cells'
plotting.plot_fraction_cells_by_area_depth(cluster_meta, n_clusters_cre, normalize=False, label=label, 
                                           save_dir=base_dir, folder=folder)

compute % cells per area / depth relative to chance

to deal with uneven sampling across areas & depths, we will express the fraction of cells per cluster in each area & depth as a percentage relative to chance. We can compute the expected number of cells per area and depth in each cluster based on random sampling of our area/depth distribution, then compute how many cells are actually in each area and depth per cluster and express that as a % relative to chance

To compute the expected number of cells in each cluster based on sampling:

* take size of cluster (n_neurons) and select a random set of cells of that size
* repeat 100x to get a distribution of random cells
* compute the number of cells in each area and depth in the random distribution
* quantify how many cells are actually in each area and depth in the clustered data
* compute significance of actual # cells relative to random distrubution 
* report % cells relative to chance and p_value
In [160]:
cell_count_stats = processing.get_cell_count_stats(cluster_meta, conditions_to_groupby = ['targeted_structure', 'layer'])
In [161]:
cell_count_stats.head()[cell_count_stats.keys()[:16]]
Out[161]:
targeted_structure layer cre_line n_cells_total_cre n_cells_cond_cre fraction_per_cond_cre cluster_id n_cells_total_cluster n_cells_cond_cluster fraction_per_cond_cluster n_cells random_n_cells random_n_cells_mean fraction_of_random relative_to_random n_times_actual_greater_than_random
0 VISl lower Slc17a7-IRES2-Cre 3306 264 0.079855 1 730 48 0.065753 48 [69, 60, 62, 61, 57, 51, 54, 57, 60, 55, 58, 6... 59.12 0.811908 -0.188092 4.0
1 VISl upper Slc17a7-IRES2-Cre 3306 328 0.099214 1 730 130 0.178082 130 [80, 84, 67, 81, 67, 82, 69, 79, 69, 76, 70, 8... 71.76 1.811594 0.811594 100.0
2 VISp lower Slc17a7-IRES2-Cre 3306 1602 0.484574 1 730 200 0.273973 200 [350, 351, 355, 342, 356, 361, 345, 352, 360, ... 355.19 0.563079 -0.436921 0.0
3 VISp upper Slc17a7-IRES2-Cre 3306 1112 0.336358 1 730 352 0.482192 352 [231, 235, 246, 246, 250, 236, 262, 242, 241, ... 243.93 1.443037 0.443037 100.0
0 VISl lower Slc17a7-IRES2-Cre 3306 264 0.079855 2 616 40 0.064935 40 [42, 57, 59, 42, 44, 42, 41, 47, 52, 47, 50, 5... 49.10 0.814664 -0.185336 3.0
In [162]:
# save stats
cell_count_stats.to_csv(os.path.join(save_dir, 'cell_count_stats.csv'))

heatmap of % rel chance

fraction of cells relative to random distribution

In [163]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range'

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre, 
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=None, suffix=suffix, save_dir=base_dir, folder=folder)
In [164]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre, 
                                             value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                             cluster_order=None, save_dir=base_dir, folder=folder, suffix=suffix)
In [165]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range_manual_sort_order'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

cluster_order = processing.get_manual_sort_order()

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                             value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                             cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)

sort clusters by the fraction of cells in VISp upper

In [166]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_full_range_VISp_upper_sort'

vmax = cell_count_stats.relative_to_random.max()
vmin = cell_count_stats.relative_to_random.min()

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, 
                                                                 location='VISp_upper', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-vmax, vmax=vmax, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)
In [167]:
value_to_plot = 'relative_to_random'
cbar_label = 'fraction cells\nrel. to chance'
suffix = '_limited_range_VISp_upper_sort'

cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, 
                                                                 location='VISp_upper', metric='relative_to_random')  

plotting.plot_cell_stats_per_cluster_for_areas_depths(cluster_meta, cell_count_stats, n_clusters_cre,
                                                      value_to_plot, cbar_label, cmap='PRGn', vmin=-1, vmax=1, 
                                                      cluster_order=cluster_order, save_dir=base_dir, folder=folder, suffix=suffix)

plot cluster heatmaps in this order

In [168]:
plotting.plot_dropout_heatmaps_for_clusters_sorted(cluster_meta, feature_matrix, cluster_order=cluster_order, 
                                       save_dir=base_dir, folder=folder, sort_type='VISp_upper_pct_rel_chance')

test plot components then plot clusters with other information as additional panels

plot clusters with % cells per area and depth

In [169]:
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
cre_fraction = fraction_cells[fraction_cells.cre_line==cre_line]
In [170]:
plotting.plot_fraction_cells_per_area_depth(cre_fraction, 1, ax=None)
Out[170]:
<matplotlib.axes._subplots.AxesSubplot at 0x25a4e517b38>

get relevant data and plot clusters with fractions

In [171]:
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
cre_counts = cell_count_stats[cell_count_stats.cre_line==cre_line]
In [172]:
cluster_id = 1
plotting.plot_pct_rel_to_chance_for_cluster(cre_counts, cluster_id, ax=None)
Out[172]:
<matplotlib.axes._subplots.AxesSubplot at 0x25a4e3d89b0>

population average trace

In [173]:
# load dataframe with response traces
df_name = 'omission_response_df'
conditions = ['cell_specimen_id', 'omitted']

data_type = 'events'
event_type = 'omissions'
inclusion_criteria = 'active_only_closest_familiar_and_novel_containers_with_all_levels'


multi_session_df = loading.get_multi_session_df_for_conditions(data_type, event_type, conditions, inclusion_criteria)
print(len(multi_session_df.ophys_experiment_id.unique()))
there are 1885 experiments in the full multi_session_df
there are 1249 experiments in the multi_session_df after limiting to platform experiments
there are 402 experiments after filtering for inclusion criteria -  active_only_closest_familiar_and_novel_containers_with_all_levels
402
In [174]:
cluster_mdf = multi_session_df.merge(cluster_meta[['cluster_id']], 
                                     on='cell_specimen_id', how = 'inner')
In [175]:
plotting.plot_population_average_response_for_cluster(cluster_mdf, cre_line, cluster_id, change=False, omitted=True, ax=None)
Out[175]:
<matplotlib.axes._subplots.AxesSubplot at 0x25a4ed69e48>

plot clusters, fraction per area depth, and pop avgs as rows

In [176]:
# get fraction cells relative to chance per cluster per cre_line
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
# get fraction of cells per area/depth per cluster per cre_line 
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
In [177]:
for cre_line in cre_lines: 
    plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
                                              cell_count_stats, fraction_cells, cre_line, 
                                              sort_order=None, suffix='_size_order', 
                                              save_dir=base_dir, folder=folder, )

use manual sort order

In [178]:
# manual_sort_order = processing.get_manual_sort_order()

# for cre_line in cre_lines: 
#     plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
#                                               cell_count_stats, fraction_cells, cre_line, 
#                                               sort_order=manual_sort_order, suffix='_manual_sort', 
#                                               save_dir=base_dir, folder=folder, )

sort by visp upper

In [209]:
# get fraction cells relative to chance per cluster per cre_line
cell_count_stats = processing.get_cell_count_stats(cluster_meta)
# get fraction of cells per area/depth per cluster per cre_line 
fraction_cells = processing.get_fraction_cells_per_area_depth(cluster_meta)
In [210]:
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  

for cre_line in cre_lines: 
    plotting.plot_clusters_stats_pop_avg_rows(cluster_meta, feature_matrix, multi_session_df,
                                              cell_count_stats, fraction_cells, cre_line, 
                                              sort_order=cluster_order, suffix='_VISp_upper_sort', 
                                              save_dir=base_dir, folder=folder, )

plot as columns

In [189]:
for cre_line in cre_lines: 
    plotting.plot_clusters_stats_pop_avg_cols(cluster_meta, feature_matrix, multi_session_df,
                                              cell_count_stats, fraction_cells, cre_line, 
                                              sort_order=cluster_order, suffix='_VISp_upper_sort', 
                                              save_dir=base_dir, folder=folder, )

within cluster correlations sorted by VISp upper

In [183]:
sort_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  

plotting.plot_within_cluster_correlations_all_cre(cluster_meta, n_clusters_cre, sort_order=sort_order, 
                                                  suffix='_VISp_upper_sort', save_dir=base_dir, folder=folder)
In [ ]:
 
In [ ]:
 
In [ ]:
 

Compute selectivity metrics on dropout scores

define stats

compute stats across cells

  • positive value of exp_mod_direction means stronger coding of pref feature in novel session
  • positive value of exp_mod_persistence means similar coding in Novel 1 and Novel >1 for pref feature
  • high value of feature_sel_within_session means highly feature selective in strongest exp level
  • low value of feature_sel_within_session means similar strength of coding for multiple features in a given session
  • high value of feature_sel_across sessions means there is a big difference between the strength of coding for the preferred feature in the preferred experience level and a different feature in a different experience level
  • low value of feature_sel_across sessions means similar coding across two different features in two different sessions
  • cell switching is indicated by low feature_sel_across_sessions and high experience selectivity
  • lack of coding overall would be low exp selectivity and low feature_sel_across_sessions
In [194]:
dropouts = dropouts.reset_index()
In [195]:
cell_stats = pd.DataFrame()
for i, cell_specimen_id in enumerate(cluster_meta.index.values):
    cell_dropouts = dropouts[dropouts.cell_specimen_id==cell_specimen_id].groupby('experience_level').mean()[features]
    stats = processing.get_coding_metrics(index_dropouts=cell_dropouts, index_value=cell_specimen_id, index_name='cell_specimen_id')
    cell_stats = pd.concat([cell_stats, stats], sort=False)
cell_stats = cell_stats.merge(cluster_meta, on='cell_specimen_id')
metrics = stats.keys()[-6:]
c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:926: RuntimeWarning:

invalid value encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:929: RuntimeWarning:

divide by zero encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:929: RuntimeWarning:

invalid value encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:916: RuntimeWarning:

invalid value encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:920: RuntimeWarning:

invalid value encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:939: RuntimeWarning:

invalid value encountered in double_scalars

c:\users\marinag\documents\code\visual_behavior_analysis\visual_behavior\dimensionality_reduction\clustering\processing.py:952: RuntimeWarning:

invalid value encountered in double_scalars

average stats across cells per cluster

In [196]:
avg_cluster_stats = cell_stats.groupby(['cre_line', 'cluster_id']).mean()
avg_cluster_stats = avg_cluster_stats[list(metrics)]
avg_cluster_stats = avg_cluster_stats.reset_index()
n_cells = cell_stats.groupby(['cre_line', 'cluster_id']).count()[['labels']].rename(columns={'labels':'n_cells'})
avg_cluster_stats = avg_cluster_stats.merge(n_cells, on=['cre_line', 'cluster_id'])

compute stats on each cluster directly

In [197]:
cluster_stats = pd.DataFrame()
for cre_line in cre_lines:
    # get cell specimen ids for this cre line
    cre_cell_specimen_ids = cluster_meta[cluster_meta.cre_line==cre_line].index.values
    # get cluster labels dataframe for this cre line
    cre_cluster_ids = cluster_meta.loc[cre_cell_specimen_ids]
    # get unique cluster labels for this cre line
    cluster_labels = np.sort(cre_cluster_ids.cluster_id.unique())
    # limit dropouts df to cells in this cre line
    feature_matrix_cre = feature_matrix.loc[cre_cell_specimen_ids]
    for i, cluster_id in enumerate(cluster_labels):
        # get cell specimen ids in this cluster in this cre line
        this_cluster_csids = cre_cluster_ids[cre_cluster_ids['cluster_id'] == cluster_id].index.values
        # get dropout scores for cells in this cluster in this cre line
        mean_dropout_df = np.abs(feature_matrix_cre.loc[this_cluster_csids].mean().unstack())
        stats = processing.get_coding_metrics(index_dropouts=mean_dropout_df.T, index_value=cluster_id, index_name='cluster_id')
        fraction_cre = len(this_cluster_csids) / float(len(cre_cell_specimen_ids))
        stats['fraction_cre'] = fraction_cre
        stats['cre_line'] = cre_line
        stats['F_max'] = mean_dropout_df['Familiar'].max()
        stats['N1_max'] = mean_dropout_df['Novel 1'].max()
        stats['N2_max'] = mean_dropout_df['Novel >1'].max()
        cluster_stats = pd.concat([cluster_stats, stats])
cluster_stats = cluster_stats.reset_index()
n_cells = cell_stats.groupby(['cre_line', 'cluster_id']).count()[['labels']].rename(columns={'labels':'n_cells_cluster'})
cluster_stats = cluster_stats.merge(n_cells, on=['cre_line', 'cluster_id'])
In [ ]:
 

merge cluster stats with cell counts per cluster

In [198]:
# create location column merging area and depth
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cluster_stats = fraction_cells.reset_index().merge(cluster_stats, on=['cre_line', 'cluster_id'])
In [199]:
cluster_stats.head()
Out[199]:
cre_line cluster_id VISl_lower VISl_upper VISp_lower VISp_upper dominant_feature dominant_experience_level next_highest_conditions feature_selectivity experience_selectivity exp_mod_direction exp_mod_persistence feature_sel_within_session feature_sel_across_sessions fraction_cre F_max N1_max N2_max n_cells_cluster
0 Slc17a7-IRES2-Cre 1 -0.184228 0.797069 -0.432092 0.427123 task Novel >1 (Familiar, all-images) 0.259593 0.401007 0.358913 1.721184 0.196154 0.161622 0.220811 0.006164 0.004962 0.008541 730
1 Slc17a7-IRES2-Cre 2 -0.181167 -0.086311 -0.078233 0.180609 all-images Novel 1 (Novel >1, task) 0.932991 0.876952 0.942771 0.101657 0.908842 0.943787 0.186328 0.026490 0.899264 0.091417 616
2 Slc17a7-IRES2-Cre 3 -0.278771 0.136651 0.009365 0.012601 all-images Novel 1 (Novel >1, task) 0.923989 0.297619 0.820990 0.984265 0.892202 0.902111 0.120085 0.076725 0.780486 0.768205 397
3 Slc17a7-IRES2-Cre 4 0.015228 -0.166132 0.037182 -0.008433 all-images Novel >1 (Novel 1, task) 0.937595 0.873003 0.468151 10.045562 0.916188 0.954869 0.096189 0.032399 0.089438 0.898452 318
4 Slc17a7-IRES2-Cre 5 0.416431 -0.467419 0.246046 -0.314425 all-images Familiar (Novel >1, task) 0.934515 0.873936 -0.864767 0.855276 0.915658 0.945945 0.096189 0.914823 0.066343 0.056741 318
In [ ]:
 

plot each relationship on its own fig

In [200]:
# for col_x in cluster_stats.columns[2:]:
#     for col_y in cluster_stats.columns[2:]:
#         try:
#             figsize=(4,4)
#             fig, ax = plt.subplots(figsize=figsize)
#             ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
#                                 hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
#             ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
#             ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
#             ax.get_legend().remove()
#             plt.axis("equal")
#             for i in range(len(cluster_stats[col_x].values)):
#                 xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#                 color_index = cluster_stats['color_index'].values[i]
#                 cluster_id = cluster_stats['cluster_id'].values[i]
#                 plt.annotate(cluster_id, xy, color=colors[color_index])
#             utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
#         except:
#             pass

barplots of metric values for clusters

In [201]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cluster_stats[cluster_stats.cre_line==cre_line]
            ax[i] = sns.barplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='VISp_upper', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'pointplots_VISp_upper_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
In [ ]:
 

relationship of across session switching and Familiar session max to % cells in VISp or VISl

In [202]:
col_x = 'feature_sel_across_sessions'
col_y = 'VISp_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [203]:
col_x = 'F_max'
col_y = 'VISp_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [204]:
col_x = 'F_max'
col_y = 'VISl_upper'

colors = utils.get_cre_line_colors()

figsize=(4,4)
fig, ax = plt.subplots(figsize=figsize)
ax = sns.scatterplot(data=cluster_stats, x=col_x, y=col_y, size='fraction_cre', sizes=(0, 1000), 
                    hue='cre_line', hue_order=cre_lines, palette=colors, ax=ax)
ax.axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
ax.axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
ax.get_legend().remove()
plt.axis("equal")
# for i in range(len(cluster_stats[col_x].values)):
#     xy = (cluster_stats[col_x].values[i], cluster_stats[col_y].values[i])
#     color_index = cluster_stats['color_index'].values[i]
#     cluster_id = cluster_stats['cluster_id'].values[i]
#     plt.annotate(cluster_id, xy, color=colors[color_index])
utils.save_figure(fig, figsize, save_dir, 'scatterplots', col_x+'_'+col_y)
In [ ]:
 

boxplots of metric values for all cells in each cluster

first merge with fraction cells info per cluster

In [205]:
# create location column merging area and depth
cell_count_stats['location'] = cell_count_stats.targeted_structure+'_'+cell_count_stats.layer
# group & unstack to get fraction relative to random for each location as columns
fraction_cells = cell_count_stats[['cre_line', 'cluster_id', 'location', 'relative_to_random']].groupby(['cre_line', 'cluster_id', 'location']).mean().unstack()
# get rid of multi-index column
fraction_cells.columns = fraction_cells.columns.droplevel()
# merge fraction cells per location with other cluster metrics
cell_stats_loc = fraction_cells.reset_index().merge(cell_stats.reset_index(), on=['cre_line', 'cluster_id'])
cell_stats_loc = cell_stats_loc.set_index('cell_specimen_id')
In [207]:
cluster_order = processing.get_cluster_order_for_metric_location(cell_count_stats, cluster_meta, location='VISp_upper', metric='relative_to_random')  
n_clusters = [n_clusters_cre[cre] for cre in cre_lines]

for metric in cluster_stats.columns[2:]:
    try:
        figsize=(15,2.5)
        fig, ax = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios':n_clusters}, sharey=True)
        for i, cre_line in enumerate(processing.get_cre_lines(cluster_meta)):
            data = cell_stats_loc[cell_stats_loc.cre_line==cre_line]
            ax[i] = sns.boxplot(data=data, x='cluster_id', order=cluster_order[cre_line], y=metric, 
                                hue='VISp_upper', palette='PRGn', ax=ax[i], dodge=False)
            ax[i].set_title(processing.get_cell_type_for_cre_line(cluster_meta, cre_line))
            ax[i].get_legend().remove()
        plt.subplots_adjust(wspace=0.4)
        utils.save_figure(fig, figsize, save_dir, 'boxplots_VISp_upper_sort', metric)
    except:
        print('problem for', metric)
problem for next_highest_conditions
C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1236: RuntimeWarning:

invalid value encountered in double_scalars

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning:

invalid value encountered in multiply

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning:

invalid value encountered in multiply

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning:

invalid value encountered in multiply

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\numpy\lib\function_base.py:3968: RuntimeWarning:

invalid value encountered in multiply

C:\Users\marinag\Anaconda3\envs\visual_behavior_sdk\lib\site-packages\matplotlib\cbook\__init__.py:1236: RuntimeWarning:

invalid value encountered in double_scalars

problem for fraction_cre
problem for F_max
problem for N1_max
problem for N2_max
problem for n_cells_cluster
In [ ]:
 
In [ ]:
 
In [ ]:
 

Summary plots experimentation

In [ ]:
 

plot experience mod values for cluster average

In [ ]:
figsize = (17,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    print(n_clusters)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i])
    ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
    ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line]+'\n experience modulation')
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('direction')
    ax[i].set_ylabel('persistence')
    ax[i].set_xlim(-1.2,1.2)
    ax[i].set_ylim(-1.2,1.2)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'experience_modulation_avg_across_cells_in_cluster')

cells and clusters together

In [ ]:
figsize = (15,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = cell_stats[cell_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', alpha=0.2,
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], ax=ax[i])
    
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    ax[i] = sns.scatterplot(data=data, x='exp_mod_direction', y='exp_mod_persistence', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i], alpha=0.7)
    
    ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
    ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line]+'\n experience modulation')
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('direction')
    ax[i].set_ylabel('persistence')
    ax[i].set_xlim(-1.2,1.2)
    ax[i].set_ylim(-1.2,1.2)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'experience_modulation_across_cells_and_clusters')
In [ ]:
 

feature selectivity within and across sesions

In [ ]:
# figsize = (15,5)
# fig, ax = plt.subplots(1,3, figsize=figsize)
# for i, cre_line in enumerate(cre_lines):
#     data = cell_stats[cell_stats.cre_line==cre_line]
#     data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
#     cluster_order = np.sort(data.cluster_id.unique())
#     n_clusters = len(cluster_order)
#     ax[i] = sns.scatterplot(data=data, x='feature_sel_within_session', y='feature_sel_across_sessions', alpha=0.2,
#                             hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], ax=ax[i])
    
#     data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
#     data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
#     cluster_order = np.sort(data.cluster_id.unique())
#     n_clusters = len(cluster_order)
#     ax[i] = sns.scatterplot(data=data, x='feature_sel_within_session', y='feature_sel_across_sessions', 
#                             hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
#                             size='n_cells', sizes=(0, 1000), ax=ax[i], alpha=0.7)
    
# #     ax[i].axvline(x=0, ymin=0, ymax=1, linestyle='--', color='gray')
# #     ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
#     ax[i].set_title(cell_types[cre_line]+'\n feature selectivity')
#     ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
#     ax[i].set_xlabel('within session')
#     ax[i].set_ylabel('across sessions')
#     ax[i].set_xlim(-0.1,1.1)
#     ax[i].set_ylim(-0.1,1.1)
# fig.tight_layout()
# # utils.save_figure(fig, figsize, base_dir, folder, 'feature_selectivity_across_cells_and_clusters')

feature selectivity across sessions vs. experience selectivity

  • cell switching is indicated by low feature_sel_across_sessions and high experience selectivity
In [ ]:
figsize = (17,5)
fig, ax = plt.subplots(1,3, figsize=figsize)
for i, cre_line in enumerate(cre_lines):
    data = avg_cluster_stats[avg_cluster_stats.cre_line==cre_line]
    data = data[data.cluster_id<10] # TEMPORARY to make sure we dont exceed # clusters in palette
    cluster_order = np.sort(data.cluster_id.unique())
    n_clusters = len(cluster_order)
    print(n_clusters)
    ax[i] = sns.scatterplot(data=data, x='experience_selectivity', y='feature_sel_across_sessions', 
                            hue='cluster_id', hue_order=cluster_order, palette=sns.color_palette()[:n_clusters], 
                            size='n_cells', sizes=(0, 1000), ax=ax[i])
#     ax[i].axvline(x=0, ymin=-1, ymax=1, linestyle='--', color='gray')
#     ax[i].axhline(y=0, xmin=-1, xmax=1, linestyle='--', color='gray')
    ax[i].set_title(cell_types[cre_line])
    ax[i].legend(fontsize='small', title_fontsize='small', bbox_to_anchor=(1,1))
    ax[i].set_xlabel('experience modulation')
    ax[i].set_ylabel('feature selectivity across sessions')
    ax[i].set_xlim(0, 1.1)
    ax[i].set_ylim(0, 1.1)
fig.tight_layout()
# utils.save_figure(fig, figsize, base_dir, folder, 'feature_sel_exp_mod_avg_across_cells_in_cluster')
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

summarize

In [ ]:
n_clusters_per_cre = cell_stats.groupby(['cre_line']).count().rename(columns={'cluster_id':'n_cells_total'})[['n_cells_total']]
n_clusters_per_feature = cell_stats.groupby(['cre_line', 'dominant_feature']).count().rename(columns={'cluster_id':'n_cells'})[['n_cells']]
n_clusters_per_feature = n_clusters_per_feature.reset_index().merge(n_clusters_per_cre, on='cre_line')
n_clusters_per_feature['fraction_cells'] = n_clusters_per_feature['n_cells']/n_clusters_per_feature['n_cells_total']
In [ ]:
colors = utils.get_cre_line_colors()[::-1]

figsize=(4.5,3)
fig, ax = plt.subplots(figsize=figsize)
sns.barplot(data=n_clusters_per_feature, x='dominant_feature', y='fraction_cells', hue='cre_line',
             palette=colors, order=features, hue_order=cre_lines, ax=ax)
ax.legend(fontsize='xx-small', title='', loc='upper right')
ax.set_ylabel('fraction of cells')
ax.set_xlabel('')
ax.set_title('feature preference')
ax.set_xticklabels(features, rotation=45)
In [ ]:
n_clusters_per_cre = cell_stats.groupby(['cre_line']).count().rename(columns={'cluster_id':'n_cells_total'})[['n_cells_total']]
n_clusters_per_feature = cell_stats.groupby(['cre_line', 'dominant_experience_level']).count().rename(columns={'cluster_id':'n_cells'})[['n_cells']]
n_clusters_per_feature = n_clusters_per_feature.reset_index().merge(n_clusters_per_cre, on='cre_line')
n_clusters_per_feature['fraction_cells'] = n_clusters_per_feature['n_cells']/n_clusters_per_feature['n_cells_total']
In [ ]:
colors = utils.get_cre_line_colors()[::-1]
experience_levels = np.sort(cluster_meta.experience_level.unique())

figsize=(4.5,3)
fig, ax = plt.subplots(figsize=figsize)
sns.barplot(data=n_clusters_per_feature, x='dominant_experience_level', y='fraction_cells', hue='cre_line',
             palette=colors, order=experience_levels, hue_order=cre_lines, ax=ax)
ax.legend(fontsize='xx-small', title='', loc='upper right')
ax.set_ylabel('fraction of cells')
ax.set_xlabel('')
ax.set_title('experience level preference')
ax.set_xticklabels(experience_levels, rotation=45);

repeat but per cluster instead of across cells

In [ ]:
cell_stats = cluster_meta.copy()
for i, cell_specimen_id in enumerate(cell_stats.index.values):
    # get dropout scores per cell 
    cell_dropouts = dropouts[dropouts.cell_specimen_id==cell_specimen_id].groupby('experience_level').mean()[features]
    # get preferred regressor and experience level and save
    dominant_feature = cell_dropouts.stack().idxmax()[1]
    dominant_experience_level = cell_dropouts.stack().idxmax()[0]
    cell_stats.loc[cell_specimen_id, 'dominant_feature'] = dominant_feature
    cell_stats.loc[cell_specimen_id, 'dominant_experience_level'] = dominant_experience_level
    # get selectivity for feature & experience level 
    # feature selectivity is ratio of largest and next largest dropouts for the dominant experience level
    order = np.argsort(cell_dropouts.loc[dominant_experience_level])
    values = cell_dropouts.loc[dominant_experience_level].values[order[::-1]]
    feature_selectivity = (values[0]-(np.mean(values[1:])))/(values[0]+(np.mean(values[1:])))
    # experience selectivity is ratio of largest and next largest dropouts for the dominant feature
    order = np.argsort(cell_dropouts[dominant_feature])
    values = cell_dropouts[dominant_feature].values[order[::-1]]
    experience_selectivity = (values[0]-(np.mean(values[1:])))/(values[0]+(np.mean(values[1:])))
    cell_stats.loc[cell_specimen_id, 'feature_selectivity'] = feature_selectivity
    cell_stats.loc[cell_specimen_id, 'experience_selectivity'] = experience_selectivity
In [ ]:
cluster_stats.keys()
In [ ]:
min_size = 0
max_size = 1000

colors = utils.get_cre_line_colors()

figsize=(4,3)
fig, ax = plt.subplots(figsize=figsize)
sns.scatterplot(data=cluster_stats, x='cre_line', y='feature_selectivity', hue='cre_line',
             palette=colors, hue_order=cell_types, size='fraction_cre', sizes=(min_size, max_size), ax=ax)
ax.legend(fontsize='xx-small', title='', bbox_to_anchor=(1.1,1))
ax.set_ylabel('feature selectivity index')
ax.set_xlabel('')
ax.set_title('feature selectivity')
# ax.set_xticklabels([processing.get_cell_type_for_cre_line(cluster_meta, cre_line) for cre_line in cre_lines], rotation=45)
ax.set_xticklabels([cre_line.split('-')[0] for cre_line in cre_lines[::-1]], rotation=0)

ax.set_ylim(0,1)
In [ ]:
max_size = 0
max_size = 1000

figsize=(4,3)
fig, ax = plt.subplots(figsize=figsize)
sns.scatterplot(data=cluster_stats, x='cre_line', y='experience_selectivity', hue='cre_line', 
             palette=colors, hue_order=cell_types, size='fraction_cre', sizes=(min_size, max_size), ax=ax)
ax.legend(fontsize='x-small', title='', bbox_to_anchor=(1.1,1))
ax.set_ylabel('experience selectivity')
ax.set_xlabel('')
ax.set_title('experience selectivity')
# ax.set_xticklabels([processing.get_cell_type_for_cre_line(cluster_meta, cre_line) for cre_line in cre_lines], rotation=45)
ax.set_xticklabels([cre_line.split('-')[0] for cre_line in cre_lines[::-1]], rotation=0)

ax.set_ylim(0,1)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

details stuff / validation

count number of cells in different areas & depths

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'targeted_structure'], normalize=False)

There are way more cells in VISp than VISl

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'layer'], normalize=False)

There are way more cells in lower layers for Sst and upper layers for Vip

In [ ]:
make_frequency_table(cluster_meta, groupby_columns = ['cre_line', 'binned_depth'], normalize=False)

Numbers get pretty small for inhibitory lines when looking at depths in 4 bins

get frequency across areas & layer for one cre line

In [ ]:
cre_line = cre_lines[1]
print(cre_line)
In [ ]:
make_frequency_table(cluster_meta[cluster_meta.cre_line==cre_line], 
                     groupby_columns = ['targeted_structure', 'layer'], normalize=False)
In [ ]:
cre_meta = cluster_meta[cluster_meta.cre_line==cre_line]
make_frequency_table(cre_meta, groupby_columns = ['targeted_structure', 'layer'], normalize=True)
In [ ]:
 

plot frequency by area and depth

In [ ]:
cre_line = cre_lines[1]
cre_meta = cluster_meta[cluster_meta.cre_line==cre_line]
frequency = make_frequency_table(cre_meta, groupby_columns = ['targeted_structure', 'layer'], normalize=True)

Rows add up to 1

In [ ]:
fig, ax = plt.subplots(figsize=(6,2.5))
ax = sns.heatmap(frequency, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction of cells'})
ax.set_ylim((0, 4))
# ax.set_yticklabels(frequency.index, rotation=0, horizontalalignment='center')
ax.set_xlim(-0.5, len(frequency.columns)+0.5)
ax.set_ylabel('')
ax.set_title(cell_types[cre_line])
fig.tight_layout()

normalizing to cluster size doesnt make sense

In [ ]:
stats_df = cre_meta[['cluster_id', 'binned_depth']]
frequency_table= stats_df.groupby('cluster_id')['binned_depth'].value_counts(normalize=False).unstack()
frequency_table= frequency_table.fillna(0)
frequency_table
In [ ]:
stats_df = cre_meta[['cluster_id', 'binned_depth']]
frequency_table= stats_df.groupby('cluster_id')['binned_depth'].value_counts(normalize=True).unstack()
frequency_table= frequency_table.fillna(0)
frequency_table
In [ ]:
sns.heatmap(frequency_table)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

plots with individual cells per cluster

In [ ]:
# get dropouts for some specific condition and add to cre meta for plotting
condition = ('all-images', 'Familiar')
metric_data = df[condition]
metric_data = metric_data.rename(columns={('all-images', 'Novel 1'):'metric'})
metric_data = pd.DataFrame(metric_data, columns=['metric'])
metric_meta = cre_meta.merge(metric_data, on='cell_specimen_id')
In [ ]:
fig, ax = plt.subplots()
ax = sns.scatterplot(data=metric_meta, y='imaging_depth', x='metric', hue='cluster_id', palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
fig, ax = plt.subplots()
ax = sns.pointplot(data=metric_meta, y='binned_depth', x='metric', hue='cluster_id', 
                   orient='h', join=False, dodge=0.5, palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
# get dropouts for some specific condition and add to cre meta for plotting
condition = ('behavioral', 'Novel 1')
metric_data = df[condition]
metric_data = metric_data.rename(columns={('all-images', 'Novel 1'):'metric'})
metric_data = pd.DataFrame(metric_data, columns=['metric'])
metric_meta = cre_meta.merge(metric_data, on='cell_specimen_id')
In [ ]:
fig, ax = plt.subplots()
ax = sns.scatterplot(data=metric_meta, y='imaging_depth', x='metric', hue='cluster_id', palette='magma_r', ax=ax)
ax.legend(bbox_to_anchor=(1,1), )
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
area_df = pd.DataFrame(frequency_table_area.unstack(), columns=['fraction']).reset_index()
area_df = area_df.groupby(['cluster_id', 'targeted_structure']).mean().unstack()
area_df.columns = area_df.columns.droplevel()
fig, ax = plt.subplots(figsize=(6,2))
ax = sns.heatmap(area_df.T, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction cells\nper area'})
ax.set_ylim((0, 2))
ax.set_yticklabels(area_df.T.index, rotation=0, horizontalalignment='center')
ax.set_ylabel('')
ax.set_xlim(-0.5, len(area_df)+0.5)
fig.tight_layout()
In [ ]:
depth_df = pd.DataFrame(frequency_table_depth.unstack(), columns=['fraction']).reset_index()
depth_df = depth_df.groupby(['cluster_id', 'binned_depth']).mean().unstack()
depth_df.columns = depth_df.columns.droplevel()

fig, ax = plt.subplots(figsize=(6,2.5))
ax = sns.heatmap(depth_df.T, vmin=0, cmap='magma', ax=ax, cbar_kws={'shrink':0.8, 'label':'fraction cells\nper depth'})
ax.set_ylim((0, 4))
ax.set_yticklabels(depth_df.T.index, rotation=0, horizontalalignment='center')
ax.set_xlim(-0.5, len(depth_df)+0.5)
ax.set_ylabel('')
fig.tight_layout()
In [ ]:
fig, ax = plt.subplots
sns.barplot(data=area_df, x='cluster_id', y='fraction', hue='targeted_structure')
In [ ]:
frequency_table_depth
In [ ]: